---
title: "Analysis code for 'People have shaped most of terrestrial nature for at least 12,000 years'"
author: "Nicolas Gauthier"
date: "Last knit on: `r Sys.Date()`"
output:
  pdf_document: 
    toc: yes
    latex_engine: xelatex
    highlight: pygments
  html_document: default
---

# Setup

Load **tidyverse** and **sf** packages for data summaries and visualization, and **patchwork** for multi-panel plots. Also load this package, **anthromes** for additional analysis and plotting functions.

```{r setup, message=FALSE}
library(tidyverse)
library(sf)
library(patchwork)
devtools::load_all() # imports this package
```

Import the precomputed Anthromes-12k-DGG dataset. These include fixed inputs like land area, region, and biome as well as the anthrome classifications for HYDE 3.2 baseline, upper, and lower scenarios.

```{r}
# HYDE/Anthromes fixed inputs
an12_dgg_inputs <- read_sf('data/an12_dgg_inputs.shp') %>% 
  st_drop_geometry() %>% # don't need the locations
  rename(land_area = land_ar, pot_vill = pot_vll, region_name = regn_nm) %>%
  mutate(region_name = if_else( # fix the region name
    region_name == 'Latin America', 'Latin America and Caribbean', 
    region_name)) %>%
  left_join(biome_key, by = c('pot_veg' = 'biome_value')) %>% 
  select(-pot_vill, -pot_veg)
```

```{r}
# Baseline, upper, and lower scenarios
an12_dgg_baseline <- read_anthromes('data/an12_dgg_baseline.csv')
an12_dgg_upper <- read_anthromes('data/an12_dgg_upper.csv')
an12_dgg_lower <- read_anthromes('data/an12_dgg_lower.csv')

# Combine in one tibble
an12_dgg <- an12_dgg_baseline %>%
  rename(baseline = anthrome) %>%
  mutate(upper = an12_dgg_upper$anthrome,
         lower = an12_dgg_lower$anthrome)%>%
  mutate(time_step = ordered(time_step,levels = time_steps_ordered), 
         .after = id) %>%
  left_join(an12_dgg_inputs, by = 'id')
```

```{r include=FALSE}
# remove unneeded files to save RAM
rm(an12_dgg_baseline, an12_dgg_upper, an12_dgg_lower)
```

Import the HYDE 3.2 population estimates.
```{r}
hyde_pop <- readRDS('data/derived_data/hyde_dgg') %>%
  filter(var == 'popc') %>%
  select(-var) %>%
  pivot_longer(-ANL12_ID, names_to = 'time_step', values_to = 'population') %>%
  mutate(time_step = ordered(time_step, levels = time_steps_ordered))
```

Import contemporary biodiversity and conservation variables.
```{r}
total_land_area <- sum(an12_dgg_inputs$land_area)

contemp_vars <- read_sf('data/contemp_vars.shp') %>%
  st_drop_geometry() %>%
  mutate(
    land_area = land_ar,
    pa50 = pa_km2 >= (0.5 * land_ar),
    ind50 = ind_cnt >= (0.5 * land_ar),
    kba50 = kba_km2 >= (0.5 * land_ar),
    v_rich = round(v_rich),
    v_thr = round(v_thr),
    hfp_max = round(hfp_max),
    tgc = X_3_c_4,
    nmh3 = nmh_34 - nmh_4
  ) %>%
  select(-c(X_3_c_4, max, land_ar, landsut, pop17, food_cl, mean, nmh, L1_ID))
```

Create a convenience tibble in "wide" format for the regression analysis in Figure 4.
```{r}
regression_dat <- an12_dgg %>%
  filter(time_step %in% c(
    time_steps_millennia,
    time_steps_centuries,
    '2010AD',
    '2017AD'
  )) %>%
  left_join(anthrome_key, by = c('baseline' = 'anthrome')) %>%
  select(id, time_step, region_name, land_area, class) %>%
  pivot_wider(names_from = time_step, values_from = class) %>%
  left_join(select(contemp_vars,-land_area), by = 'id') %>%
  na.omit() %>%
  rename_with( ~ paste0('an_', .), all_of(unique(
    c(time_steps_millennia, time_steps_centuries,
      '2010AD',
      '2017AD'
    )
  )))
```

# Main Analysis

The code below is sufficient to reproduce the analysis presented in the main text.

## Anthrome Trajectory Summaries

Create summaries of global land areas for each anthrome along with breakdowns by region and biome. See the documentation for the *anthrome_trajectory()* function for more details.
```{r, cache = TRUE}
global_summary <- an12_dgg %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>% 
  anthrome_trajectory()

regional_summary <- an12_dgg %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>% 
  anthrome_trajectory(by = region_name)

biome_summary <- an12_dgg %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>% 
  anthrome_trajectory(by = biome)
```

Do the same thing for conservation and biodiversity variables. Note the distinction between the filtered variables and the summed ones for which the preprocessing is more complex.
```{r, cache = TRUE}
# Indigenous lands
ind_summary <- contemp_vars %>%
  select(id, ind_cnt) %>%
  rename(land_area = ind_cnt) %>%
  remove_missing() %>% # remove missing cells
  # this join here has two land areas 
  left_join(select(an12_dgg, -land_area), by = 'id') %>%  
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>%
  anthrome_trajectory() %>%
  mutate(var = 'Indigenous Lands')

# Protected areas
pa_summary <- contemp_vars %>%
  select(id, pa_km2) %>%
  rename(land_area = pa_km2) %>%
  remove_missing() %>%
  left_join(select(an12_dgg, -land_area), by = 'id') %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>%
  anthrome_trajectory() %>%
  mutate(var = 'Protected Areas')

# Key Biodiversity Areas
kba_summary <- contemp_vars %>%
  select(id, kba_km2) %>%
  rename(land_area = kba_km2) %>%
  remove_missing() %>%
  left_join(select(an12_dgg, -land_area), by = 'id') %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline)%>%
  anthrome_trajectory() %>%
  mutate(var = 'Key Biodiversity Areas')

# Unused lands
unused_summary <- contemp_vars %>%
  select(id, land_area) %>%
  left_join(select(an12_dgg, -land_area), by = 'id') %>% 
  filter(time_step == '2017AD', baseline >= 50) %>%
  select(id, land_area) %>%
  remove_missing() %>%
  left_join(select(an12_dgg, -land_area), by = 'id') %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline)%>%
  anthrome_trajectory() %>%
  mutate(var = 'Cultured and Wildlands (2017)')

# Potential Natural Habitat
nmh3_summary <- contemp_vars %>%
  mutate(nmh3_km2 = nmh3 * land_area) %>%
  select(id, nmh3_km2) %>%
  rename(land_area = nmh3_km2) %>%
  left_join(select(an12_dgg, -land_area), by = 'id') %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>%
  anthrome_trajectory() %>%
  mutate(var = 'Potential Natural (NMH)')

# Likely Natural Habitat
nmh4_summary <- contemp_vars %>%
  mutate(nmh4_km2 = nmh_4 * land_area) %>%
  select(id, nmh4_km2) %>%
  rename(land_area = nmh4_km2) %>%
  left_join(select(an12_dgg, -land_area), by = 'id') %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline)%>%
  anthrome_trajectory() %>%
  mutate(var = 'Likely Natural (NMH)')

# Shared Lands
shared_3gc_summary <- contemp_vars %>%
  filter(tgc == 2) %>%
  select(id, land_area) %>%
  left_join(select(an12_dgg, -land_area), by = 'id') %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>%
  anthrome_trajectory() %>%
  mutate(var = 'Shared Lands (3GC)')

# Large Wild Areas
wild_3gc_summary <- contemp_vars %>%
  filter(tgc == 3) %>%
  select(id, land_area) %>%
  left_join(select(an12_dgg, -land_area), by = 'id') %>% 
  select(-lower, -upper) %>% rename(anthrome = baseline)%>%
  anthrome_trajectory() %>%
  mutate(var = 'Large Wild Areas (3GC)')


# combine into a single tibble for convenience
contemp_summary <- bind_rows(
  kba_summary,
  pa_summary,
  ind_summary,
  unused_summary,
  nmh3_summary,
  nmh4_summary,
  shared_3gc_summary,
  wild_3gc_summary
) %>%
  # make sure they plot in the right order
  mutate(var = factor(
    var,
    levels = c(
      'Key Biodiversity Areas',
      'Protected Areas',
      'Indigenous Lands',
      'Cultured and Wildlands (2017)',
      'Potential Natural (NMH)',
      'Likely Natural (NMH)',
      'Shared Lands (3GC)',
      'Large Wild Areas (3GC)'
    )
  ))
```

```{r include = FALSE}
# remove unneeded files to save RAM
rm(kba_summary, pa_summary, ind_summary, unused_summary, 
   nmh3_summary, nmh4_summary, shared_3gc_summary, wild_3gc_summary)
```


Save summary .csv files of everything calculated above to the data/derived_data/ directory (code not shown).

```{r, include = FALSE}
global_summary %>% 
  select(-period, -year) %>% 
  pivot_wider(id_cols = class, names_from = time_step, values_from = percent_land_area) %>%
  mutate(across(where(is.numeric), ~replace_na(.x, 0))) %>%
  left_join(anthrome_key, by = 'class') %>%
  select(class, level, type, any_of(time_steps_ordered)) %>%
  write_csv('data/derived_data/an12_dgg_summary_global.csv')

#this now saves NA region field with nothing in it
regional_summary %>%
  select(-period, -year) %>%
  pivot_wider(id_cols = c(class, region_name), names_from = time_step, values_from = percent_land_area) %>%
  arrange(region_name, class) %>%
  left_join(anthrome_key, by = 'class') %>%
  select(region_name, class, level, type, any_of(time_steps_ordered)) %>%
  mutate(across(where(is.numeric), ~replace_na(.x, 0))) %>%
  write_csv('data/derived_data/an12_dgg_summary_region.csv')

biome_summary %>%
  select(-period, -year) %>%
  pivot_wider(id_cols = c(biome, class), names_from = time_step, values_from = percent_land_area) %>%
  arrange(biome, class) %>%
  left_join(anthrome_key, by = 'class') %>%
  select(biome, class, level, type, any_of(time_steps_ordered)) %>%
  mutate(across(where(is.numeric), ~replace_na(.x, 0))) %>%
  write_csv('data/derived_data/an12_dgg_summary_biomes.csv')

contemp_summary %>%
  select(-period, -year) %>%
  pivot_wider(id_cols = c(var, class), names_from = time_step, values_from = percent_land_area) %>%
  arrange(var, class) %>%
  left_join(anthrome_key, by = 'class') %>%
  select(var, class, level, type, any_of(time_steps_ordered)) %>%
  mutate(across(where(is.numeric), ~replace_na(.x, 0))) %>%
  write_csv('data/derived_data/an12_dgg_summary_contemp.csv')
```

## HYDE Population Curves

Calculate global population trends from HYDE 3.2 data. See function documentation for *get_hyde_pop()* for more details.
```{r}
hyde_pop_all <- get_hyde_pop(hyde_pop) 
  
hyde_pop_regions <- get_hyde_pop(hyde_pop, by = region_name) 

hyde_pop_biomes <- get_hyde_pop(hyde_pop, by = biome) %>%
    filter(biome != 'Ice, snow')
```

```{r include = FALSE}
# remove hyde population data to save RAM
rm(hyde_pop)
```


## Anthrome Legacies

Fit a series of GLMs to KBAs and species richness data using the anthrome maps from each time step in turn as categorical predictors. Repeat the analysis for all global land, natural areas, protected areas, and Indigenous lands. Extract the AIC of each model to assess how strongly the anthromes from each time period are associated with the given response variable.

```{r}
# define a helper function to fit a GLM and extract it's AIC from a input formula.
calc_aic <- function(predictand, predictor, fam, dat){
  as.formula(paste(predictand, '~', predictor)) %>%
    glm(family = fam, data = dat) %>%
    AIC
}
```

Think of AIC as the amount of “divergence” between the model and the data -- a relative rather than absolute measure of model fit. You can’t meaningfully compare the divergences of same model from different datasets, only the divergences among different models from the same dataset. Rescaling the AICs to [0,1] allows us to compare across variables by thinking in terms of the “best” or “worst” models for each row.

```{r, cache = TRUE}
legacy_mods <-
  # find all combinations of response variables and predictor time steps
  expand_grid(time_step = unique(c(time_steps_millennia, 
                                   time_steps_centuries)),
              predictands = c('kba50', 'v_thr', 'v_rich')) %>%
  mutate(
    # prepare the predictor variables
    predictors = paste0('an_', time_step),
    # associate each response variable to the correct distribution
    fam = if_else(predictands == 'kba50', 'binomial', 'poisson'),
    # Global GLM, use pmap to iterate through each model
    global_aic = pmap_dbl(list(predictands, predictors, fam),calc_aic, 
                          dat = regression_dat),
    # "natural habitat"
    nmh_aic = pmap_dbl(list(predictands, predictors, fam), calc_aic, 
                       dat = filter(regression_dat, nmh_34 > 0.5)),
    # protected areas
    pa_aic = pmap_dbl(list(predictands, predictors, fam), calc_aic, 
                      dat = filter(regression_dat, pa50 == TRUE)),
    # Indigenous lands
    ind_aic = pmap_dbl(list(predictands, predictors, fam), calc_aic, 
                       dat = filter(regression_dat, ind50 == TRUE))) %>%
  pivot_longer(global_aic:ind_aic, names_to = 'domain', values_to = 'aic') %>%
  # rescale AIC values by response variable
  group_by(predictands, domain) %>%
  mutate(aic_scaled = scales::rescale(aic)) %>%
  ungroup()
```

# Figures

Reproduce all the figures in the main paper using the analyses run above. Please refer to the corresponding .Rmd source file for the plotting code.

## Figure 1 - Global Summary

```{r, fig.width=9, fig.height=3.5, echo = FALSE, warning=FALSE}
global_plot <- global_summary %>%
 mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  filter(class != 'Ice') %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), position = 'fill', width = .8, size = .1) +
    geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  geom_line(data = hyde_pop_all, aes(y = pop_percent), color = 'darkred', linetype = 1, group = 1, size = .8) +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  facet_grid(~period, scales = 'free_x', space = 'free') +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land area') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.1, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

global_plot
```


## Figure 2 - Regional Summary

```{r, fig.width = 9, fig.height = 8, echo = FALSE, warning=FALSE}
region_plot <- regional_summary %>% 
  filter(!is.na(region_name)) %>%
    filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), width = .8, size = .1)+
  geom_vline(xintercept = c('0AD', '1700AD'), color = 'black', alpha = .5) +
  geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  geom_line(data = hyde_pop_regions, aes(y = pop_percent), color = 'darkred', linetype = 1, group = 1, size = .7) +
  facet_wrap(~region_name, ncol = 2) +
  scale_fill_manual(values = anthrome_colors) +
  scale_color_manual(values = anthrome_colors) +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land area') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.7, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

fig2 <-region_plot + legend_long + plot_layout(widths =c(3,1))
fig2
```


## Figure 3 - Biome summary

```{r fig.width = 9, fig.height = 8, echo = FALSE, warning=FALSE}
biome_plot <- biome_summary %>% 
    filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  filter( class != 'NODATA', class != 'Ice', biome != 'Ice, snow') %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), width = .8, size = .1)+
  geom_vline(xintercept = c('0AD', '1700AD'), color = 'black', alpha = .5) +
  geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  geom_line(data = hyde_pop_biomes, aes(y = pop_percent), color = 'darkred', linetype = 1, group = 1) +
  facet_wrap(~biome, ncol = 2, dir = 'v') +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land surface') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.7, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

fig3 <- biome_plot + legend_long + plot_layout(widths = c(3, 1))
fig3
```

## Figure 4 - Contemporary Data

```{r, include = FALSE, warning=FALSE}
contemp_plot <- contemp_summary %>%
      filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  filter(class != 'NODATA', class != 'Ice') %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), position = 'fill', width = .8, size = .1)+
  geom_vline(xintercept = c('0AD', '1700AD'), color = 'black', alpha = .5) +
  geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  facet_wrap(~var, ncol = 2, dir = 'h') +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land surface') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(1, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))
```
 
```{r, include = FALSE, warning=FALSE}
legacy_plot <- legacy_mods %>%
  left_join(time_key) %>%
  mutate(vars = factor(predictands, levels = rev(c('v_rich', 'v_thr', 'kba50'))),
         name = case_when(str_detect(domain, 'global') ~ 'Global Lands',
                          str_detect(domain, 'nmh') ~ 'Likely and Potential Natural Lands (NMH)',
                          str_detect(domain, 'pa') ~ 'Protected Areas',
                          str_detect(domain, 'ind') ~ 'Indigenous Lands')) %>%
  ggplot(aes(as.factor(year), vars, fill = aic_scaled), color = 'white') +
  geom_tile() +
  geom_vline(xintercept = '1', color = 'red', linetype = 2) +
  scale_fill_viridis_c(name = '', breaks = c(.05, .95), trans = 'reverse', labels = c('More informative', 'Less informative')) +
  theme_classic() +
  facet_wrap(~domain, ncol = 1) +
  scale_y_discrete(labels = c('Key Biodiversity Areas', 'Threatened Species Richness', 'Species Richness')) +
  scale_x_discrete(breaks = c(-10000, -5000, 1, 500, 1000, 1500, 2000),
                   labels = c('10000ʙᴄᴇ', '5000ʙᴄᴇ', '1ᴄᴇ', '500ᴄᴇ', '1000ᴄᴇ', '1500ᴄᴇ', '2000ᴄᴇ'))  +
  labs(x = 'Anthrome Year', y = NULL) +
  theme(strip.background = element_blank(), axis.text=element_text(size=rel(0.8)), legend.position = 'bottom')
```

```{r, fig.width=8, fig.height =9, echo = FALSE, warning=FALSE}
fig4 <- (wrap_elements(full = contemp_plot + legend_long + plot_layout(widths = c(3, 1))) / legacy_plot) + plot_layout(heights = c(1.5,1)) + plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')')

fig4
```

Save the figures to the figures/ directory (code not shown).
```{r, include = FALSE}
# save the figures
ggsave('figures/anthrome_summary_global.png', plot = global_plot, width = 9, height = 3.5, dpi = 600)
ggsave('figures/anthromes_summary_region.png', plot = fig2, height = 8, width = 9, dpi = 600)
ggsave('figures/anthromes_summary_biome.png', plot = fig3, height = 8, width = 9, dpi = 600)
ggsave('figures/anthromes_legacies.png', plot = fig4, height = 9, width = 8, dpi = 600)
```


# Supplemental Analyses and Figures
Other visualization and analyses not included in the main paper. These are not included in the knit analysis.pdf document by default as they take time and resources to run. Please refer to the analysis.rmd source document for the relevant code.

```{r}
knitr::knit_exit()
```

## Uncertainties
Visualize the anthrome uncertainties using the HYDE 3.2 upper and lower scenarios in the same way as above.

```{r}
# could modify anthrome trajectory function so it computes uncertainties automatically . . .
global_summary_upper <- an12_dgg %>% 
  select(-baseline, -lower) %>% 
  rename(anthrome = upper) %>%
  anthrome_trajectory()

global_summary_lower <- an12_dgg %>% 
  select(-baseline, -upper) %>% 
  rename(anthrome = lower) %>%
  anthrome_trajectory()

regional_summary_upper <- an12_dgg %>% 
  select(-baseline, -lower) %>% 
  rename(anthrome = upper) %>%
  anthrome_trajectory(by = region_name)

regional_summary_lower <- an12_dgg %>% 
  select(-baseline, -upper) %>% 
  rename(anthrome = lower) %>%
  anthrome_trajectory(by = region_name)

biome_summary_upper <- an12_dgg %>% 
  select(-baseline, -lower) %>% 
  rename(anthrome = upper) %>%
  anthrome_trajectory(by = biome)

biome_summary_lower <- an12_dgg %>% 
  select(-baseline, -upper) %>% 
  rename(anthrome = lower) %>% 
  anthrome_trajectory(by = biome)
```

```{r fig.width=9, fig.height=7, echo = FALSE, warning=FALSE}
a <- global_summary_upper %>%
      filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  filter(class != 'Ice', class != 'NODATA') %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), position = 'fill', width = .8, size = .1)+
    geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  facet_grid(~period, scales = 'free_x', space = 'free') +
  theme_classic() +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land area', subtitle = 'Upper scenario') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.1, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

b <- global_summary_lower %>%
      filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  filter(class != 'Ice', class != 'NODATA') %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), position = 'fill', width = .8, size = .1)+
    geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  facet_grid(~period, scales = 'free_x', space = 'free') +
  theme_classic() +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land area', subtitle = 'Lower scenario') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.1, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

c <- a / b + plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')')
d <- wrap_elements(full = c) + legend_long + plot_layout(widths = c(3.5, 1))
```

```{r, include = FALSE}
ggsave('figures/anthrome_uncertainty_global.png', d, width = 9, height = 7)
```


```{r fig.width = 11, fig.height = 8, echo=FALSE, warning=FALSE}
e <- regional_summary_upper %>%  filter(!is.na(region_name)) %>%
      filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), width = .8, size = .1) +
  geom_vline(xintercept = c('0AD', '1700AD'), color = 'black', alpha = .5) +
  geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  facet_wrap(~region_name, ncol = 2) +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land area', subtitle = 'Upper scenario') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.7, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

f <- regional_summary_lower %>% filter(!is.na(region_name)) %>%
      filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), width = .8, size = .1)+
  geom_vline(xintercept = c('0AD', '1700AD'), color = 'black', alpha = .5) +
  geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  facet_wrap(~region_name, ncol = 2) +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land area', subtitle = 'Lower scenario') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.7, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

g <- e + f + plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')')

h <- wrap_elements(full = g) / legend_wide + plot_layout(heights = c(4, 1))
```

```{r, include = FALSE}
ggsave('figures/anthrome_uncertainty_regions.png', h, width = 11, height = 8, dpi = 600)
```

```{r, fig.width = 11, fig.height = 8, echo = FALSE, warning = FALSE}
i <- biome_summary_upper %>% 
      filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  filter( class != 'NODATA', class != 'Ice', biome != 'Ice, snow') %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), width = .8, size = .1)+
  geom_vline(xintercept = c('0AD', '1700AD'), color = 'black', alpha = .5) +
  geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  facet_wrap(~biome, ncol = 2, dir = 'v') +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land surface', subtitle = 'Upper scenario') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.7, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

j <- biome_summary_lower %>% 
      filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  filter( class != 'NODATA', class != 'Ice', biome != 'Ice, snow') %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), width = .8, size = .1)+
  geom_vline(xintercept = c('0AD', '1700AD'), color = 'black', alpha = .5) +
  geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  facet_wrap(~biome, ncol = 2, dir = 'v') +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land surface', subtitle = 'Lower scenario') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.7, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

k <- i + j + plot_annotation(tag_levels = 'a', tag_prefix = '(', tag_suffix = ')')

l <- wrap_elements(full = k) / legend_wide + plot_layout(heights = c(4, 1))
l
```

```{r, include = FALSE}
ggsave('figures/anthrome_uncertainty_biomes.png', l, width = 11, height = 8, dpi = 600)
```


## Onsets
Calculate the onset timing of intensive anthromes.
```{r}
# this could be a function?
intensive_onset <- an12_dgg %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>% 
    group_by(id) %>%
    filter(anthrome <= 43) %>%
    left_join(time_key) %>%
    summarise(intensive_onset = min(year, na.rm = TRUE))
```

Classify the different onset timings for easier visualization.
```{r}
change_type <- an12_dgg %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>%
  group_by(id) %>%
  summarise(intensive = any(anthrome <= 43), cultured = any(anthrome <= 54)) %>%
  left_join(intensive_onset, by = 'id') %>%
  # fyi this case when statement evaluates in order
  mutate(class = case_when(intensive == TRUE & intensive_onset <= 1 ~'U0001', 
                            intensive == TRUE & intensive_onset <= 1500 ~ 'U1500',
                            intensive == TRUE & intensive_onset <= 1850 ~ 'U1850',
                            intensive == TRUE & intensive_onset <= 1950 ~ 'U1950',
                            intensive == TRUE ~ 'RECNT',
                            cultured == TRUE ~ 'SEMI',
                            TRUE  ~ 'NEVR')) %>%
  left_join(an12_dgg_inputs, ., by = 'id') %>%
  # distingguish woody from nonwoody biomes
  mutate(class_biome = if_else(biome %in% biome_key$biome[1:8], 
                               paste0('W_', class), 
                               paste('N_', class))) %>% 
  select(id, land_area, class, class_biome)
```

Print the outputs here.
```{r}
change_type %>% 
  count(class, wt =  land_area, name = 'area') %>% 
  mutate(percent = area / sum(area) * 100)
```

Same as above, but broken down by woody and non-woody biomes.
```{r}
change_type %>% 
  count(class_biome, wt =  land_area, name = 'area') %>% 
  mutate(percent = area / sum(area) * 100)
```

The visualizations for this map were done separately in ArcGIS. Save out the required .csv here.
```{r}
write_csv(change_type, 'data/derived_data/change_type.csv')
```

## Olson Biomes

Rerun the biome-level analysis using the Olson Biomes. 
```{r}
olson_summary <- an12_dgg %>% 
  select(-lower, -upper) %>% 
  rename(anthrome = baseline) %>%
  left_join(read_csv('data/olson_biomes.csv'), by = 'id') %>%
  filter(!(olson_biome %in% c(0, NA))) %>% # the NAs are from the land mask, but what about the 0's
  mutate(olson_biome = as.factor(olson_biome)) %>%
  anthrome_trajectory(by = olson_biome) %>%
  mutate(olson_biome = as.numeric(olson_biome)) %>%
  left_join(olson_key, by = 'olson_biome')
```

```{r, fig.width=9, fig.height=10, echo = FALSE, warning = FALSE}
olson_plot <- olson_summary %>%
    filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  filter( class != 'NODATA') %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), width = .8, size = .1)+
  geom_vline(xintercept = c('0AD', '1700AD'), color = 'black', alpha = .5) +
  geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  facet_wrap(~olson_name, ncol = 2, dir = 'v') +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land surface') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.7, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

olson_plot + legend_long + plot_layout(widths = c(3, 1))
```

```{r, include = FALSE}
ggsave('figures/olson_biomes.png', olson_plot + legend_long + plot_layout(widths = c(3, 1))
, height = 10, width = 9, dpi = 600)
```

## 15 Class Biomes
And again for the 15-class biomes.
```{r}
biome15_summary <- an12_dgg %>%
  select(-upper, -lower) %>% 
  rename(anthrome = baseline) %>%
  left_join(biome_key_15) %>%
  anthrome_trajectory(by = biome15)
```

```{r, fig.width=9, fig.height=10, echo = FALSE, warnings=FALSE}
biome15_plot <- biome15_summary %>%
    filter(!is.na(period))%>% # temporary fix, looks like the new count isn't dropping order levels and keeping years after 2000?
  mutate(time_step = ordered(time_step, levels = time_steps_ordered)) %>%
  filter( class != 'NODATA') %>%
  ggplot(aes(time_step, percent_land_area)) +
  geom_col(aes(fill = class, color = class), width = .8, size = .1)+
  geom_vline(xintercept = c('0AD', '1700AD'), color = 'black', alpha = .5) +
  geom_hline(yintercept = .5, linetype = 2, color = 'black', alpha = .25) +
  facet_wrap(~biome15, ncol = 2, dir = 'v') +
  scale_fill_manual(values = anthrome_colors, name = 'Anthrome') +
  scale_color_manual(values = anthrome_colors, name = 'Anthrome') +
  theme_classic() +
  scale_y_continuous(expand = c(0.01,0.01), labels = scales::percent_format(accuracy = 1), n.breaks = 3) +
  scale_x_discrete(breaks = c('8000BC', '0AD', '800AD', '1700AD', '1800AD', '1900AD', '2000AD'), 
                   labels = c('8000ʙᴄᴇ', '1ᴄᴇ', '800ᴄᴇ', '1700ᴄᴇ', '1800ᴄᴇ', '1900ᴄᴇ', '2000ᴄᴇ')) +
  labs(x = 'Year', y = 'Percent land surface') +
  theme(legend.position = 'none', strip.text.x = element_text(hjust = 0), panel.spacing.x=unit(.7, "lines"), strip.background = element_blank(), axis.line.y = element_blank(),  axis.text=element_text(size=rel(0.7)), panel.border = element_rect(colour = "grey20", fill=NA, size = 1))

biome15_plot + legend_long + plot_layout(widths = c(3, 1))
```

```{r include = FALSE}
ggsave('figures/biomes15.png', biome15_plot + legend_long + plot_layout(widths = c(3, 1))
, height = 10, width = 9, dpi = 600)
```

```{r, include = FALSE}
# anthromes data no longer needed, remove to conserve RAM
rm(an12_dgg)
```


## Regional Legacies

```{r}
regions_dat <- regression_dat %>%
  group_nest(region_name) %>%
  mutate(mod = map(
    data,
    ~ expand_grid(
      time_step = unique(c(
        time_steps_millennia,
        time_steps_centuries
      )),
      predictands = c('kba50', 'v_thr', 'v_rich')
    ) %>%
      mutate(
        predictors = paste0('an_', time_step),
        fam = if_else(predictands == 'kba50', 'binomial', 'poisson'),
        aic = pmap_dbl(list(predictands, predictors, fam),
                       calc_aic, dat = .)
      ) %>%
      group_by(predictands, name) %>%
      mutate(aic_scaled = scales::rescale(value)) %>%
      ungroup()
  )) %>%
  select(-data) %>%
  unnest(mod)
```



```{r, fig.width = 7.2, fig.height = 8}
regions_dat %>%
  mutate(vars = factor(predictands, levels = rev(c('v_rich', 'v_thr', 'kba50')))) %>%
  group_by(vars, region_name) %>%
  mutate(aic_scaled = scales::rescale(aic)) %>%
  ggplot(aes(as.factor(year), vars, fill = aic_scaled), color = 'white') +
  geom_tile() +
  geom_vline(xintercept = '1', color = 'red', linetype = 2) +
  facet_wrap(~region_name, ncol = 1) +
 scale_fill_viridis_c(name = '', breaks = c(.05, .95), trans = 'reverse', labels = c('More informative', 'Less informative')) +
  theme_classic() +
  scale_y_discrete(labels = c('Key Biodiversity Areas', 'Threatened Species Richness', 'Species Richness')) +
  scale_x_discrete(breaks = c(-10000, -5000, 1, 500, 1000, 1500, 2000),
                   labels = c('10000ʙᴄᴇ', '5000ʙᴄᴇ', '1ᴄᴇ', '500ᴄᴇ', '1000ᴄᴇ', '1500ᴄᴇ', '2000ᴄᴇ'))  +
  labs(x = 'Anthrome Year', y = NULL) +
  theme(strip.background = element_blank(), axis.text=element_text(size=rel(0.8)), legend.position = 'bottom')
```

```{r}
ggsave('figures/anthromes_legacies_regions.png', height = 8, width = 7.2, dpi = 600)
ggsave('figures/anthromes_legacies_regions.svg', height = 8, width = 7.2)
```
